# Justifying Cooperation
# Step 01: Read and wrangle data
# Last updated: 08/15/2023


# Initial settings --------------------------------------------------------

library(tidyverse)
library(readstata13)

# Experiment 1 Data ---------------------------------------------------------------

  # * Read in Data -----------------------------------------------------------
df1 <- read_csv("../raw_data/GHT_500_AUG.csv") %>%  
  filter(Q1464 == "I agree to participate") %>% 
  select(ResponseId, matches("city_(first|second|third)"), 
         matches("you_(first|second|third)"), # matches = regex, contains = exact match
         contains("_incumbent"), contains("_project"), contains("_netdist"), contains("_netcity"),
         matches("_vote$"),
         education, ethnicity, hhi, political_party, region) %>%   # regex, \\d is any digit, $ to filter out _voters 
  # * Rename and recode df  -------------------------------------------------

rename(city_1 = city_first, # new varname = old varname
       city_2 = city_second,
       city_3 = city_third,
       
       dist_1 = you_first,
       dist_2 = you_second,
       dist_3 = you_third) %>% 
  
  # first pivot longer to edit column names, then pivot wider later
  pivot_longer(names_to = "name", values_to = "values", cols = city_1:pf3_vote) %>% 
  filter(!is.na(values)) %>% 
  mutate(task = str_extract(name, "\\d+") %>% as.numeric(), # get vignette (task) number
         name = str_replace(name, "_\\d|pf\\d_|cf\\d_", ""), # get rid of task in column names
         name = case_when(name == "incumbent" ~ "eval_incumbent", 
                          name == "project" ~ "eval_project",
                          TRUE ~ name)) %>% # re-code, equivalent to if-else statements
  pivot_wider(names_from = "name", values_from = "values") %>% 
  # now we re-code all the values
  mutate(eval_incumbent = case_when(eval_incumbent == "Strongly disapprove" ~ -1, 
                                    eval_incumbent == "Somewhat disapprove" ~ -.5,
                                    eval_incumbent == "Neither approve nor disapprove" ~ 0,
                                    eval_incumbent == "Somewhat approve" ~ .5,
                                    eval_incumbent == "Strongly approve" ~ 1),
         eval_project = case_when(eval_project == "Strongly disapprove" ~ -1,
                                  eval_project == "Somewhat disapprove" ~ -.5,
                                  eval_project == "Neither approve nor disapprove" ~ 0,
                                  eval_project == "Somewhat approve" ~ .5,
                                  eval_project == "Strongly approve" ~ 1),
         vote_inc = case_when(vote=="I would certainly vote for the incumbent" ~ 1,
                              vote=="I would probably vote for the incumbent" ~ 1,
                              vote=="I would probably vote for the challenger" ~ 0,
                              vote=="I would certainly vote for the challenger" ~ 0,
                              vote=="I'd be equally likely to vote for either candidate" ~ 0),
         vote = case_when(vote == "I would certainly vote for the challenger" ~ -1,
                          vote == "I would probably vote for the challenger" ~ -.5,
                          vote == "I'd be equally likely to vote for either candidate" ~ 0,
                          vote == "I would probably vote for the incumbent" ~ .5,
                          vote == "I would certainly vote for the incumbent" ~ 1)) %>% 
  mutate(paydist_cat = case_when(dist == "($1,250,000.00)" ~ 1,
                                 dist == "($625,000.00)" ~ 2,
                                 dist == "($250,000.00)" ~ 3,
                                 dist == "($125,000.00)" ~ 4,
                                 dist == "($25,000.00)" ~ 5,
                                 dist == "$0.00" ~ 6,
                                 dist == "$25,000.00" ~ 7,
                                 dist == "$125,000.00" ~ 8,
                                 dist == "$250,000.00" ~ 9,
                                 dist == "$625,000.00" ~ 10,
                                 dist == "$1,250,000.00" ~ 11),
         paycity_cat = case_when(city == "($12,500,000.00)" ~ 1,
                                 city == "($10,000,000.00)" ~ 2,
                                 city == "($7,500,000.00)" ~ 3,
                                 city == "($6,250,000.00)" ~ 4,
                                 city == "($5,000,000.00)" ~ 5,
                                 city == "($3,750,000.00)" ~ 6,
                                 city == "($2,500,000.00)" ~ 7,
                                 city == "($1,250,000.00)" ~ 8,
                                 city == "($250,000.00)" ~ 9,
                                 city == "($25,000.00)" ~ 10,
                                 city == "$0.00" ~ 11,
                                 city == "$25,000.00" ~ 12,
                                 city == "$250,000.00" ~ 13,
                                 city == "$1,250,000.00" ~ 14,
                                 city == "$2,500,000.00" ~ 15,
                                 city == "$3,750,000.00" ~ 16,
                                 city == "$5,000,000.00" ~ 17,
                                 city == "$6,250,000.00" ~ 18,
                                 city == "$7,500,000.00" ~ 19,
                                 city == "$10,000,000.00" ~ 20,
                                 city == "$12,500,000.00" ~ 21)) %>% 
  #deal with the format ($25,000,000) -> -25000000
  mutate(city = str_replace(city, "\\(\\$", "-"),
         city = parse_number(city),
         dist = str_replace(dist, "\\(\\$", "-"),
         dist = parse_number(dist)) %>% 
  mutate(cityabovezero = case_when(city > 0 ~ 1,
                                   TRUE ~ 0),
         cityatorabovezero = case_when(city >= 0 ~1,
                                       TRUE ~ 0),
         distabovezero = case_when(dist > 0 ~ 1,
                                   TRUE ~ 0),
         distatorabovezero = case_when(dist >= 0 ~ 1,
                                       TRUE ~ 0),
         distworsecity = case_when(dist/25000 < city/250000 ~ 1,
                                   TRUE ~ 0),
         city_pc = city/250000,
         dist_pc = dist/25000,
         #make it show vignette 2 and 3 as categorical variables
         task = factor(task),
         paydist_cat = factor(paydist_cat),
         paycity_cat = factor(paycity_cat)) %>% 
  mutate(education = case_when(education == -3105 ~ NA_real_,
                               TRUE ~ education),
         non_white = case_when(ethnicity != 1 ~ 1,
                               ethnicity == 1 ~ 0),
         hhi = case_when(hhi == -3105 ~ NA_real_,
                         TRUE ~ hhi),
         pid7 = case_when(political_party==6 ~ 3,
                          political_party==7 ~ 4,
                          political_party==8 ~ 5,
                          political_party==9 ~ 6,
                          political_party==10 ~ 7,
                          TRUE ~ political_party),
         region = factor(region)) %>% 
  mutate(big_dist_return = case_when(dist_pc >= 10 ~ 1,
                                     TRUE ~ 0),
         vote100 = vote_inc*100) %>% 
  mutate(dist_correct = case_when(is.na(netdist) ~ NA_integer_,
                                  dist_pc > 0 & netdist == "Net positive return" | 
                                    dist_pc == 0 & netdist == "Net neutral return" |
                                    dist_pc < 0 & netdist == "Net negative return" ~ 1,
                                  TRUE ~ 0),
         city_correct = case_when(is.na(netcity) ~ NA_integer_,
                                  city_pc > 0 & netcity == "Net positive return" | 
                                    city_pc == 0 & netcity == "Net neutral return" |
                                    city_pc < 0 & netcity == "Net negative return" ~ 1,
                                  TRUE ~ 0)) %>% 

  # * Standardize data ------------------------------------------------------

  filter(!is.na(eval_incumbent) & 
           !is.na(eval_project) &
           !is.na(vote))



# Experiment 1 Replication Data -------------------------------------------


  # * Read in Data ----------------------------------------------------------
df1R1 <- read_csv("../raw_data/November Replication_November 20, 2020_07.43.csv") %>%    # read data 
  # usually done using read_csv 
  select(ResponseId, method, money, matches("city_(first|second|third)"), 
         matches("you_(first|second|third)"), # matches = regex, contains = exact match
         contains("_incumbent"), contains("_project"), contains("_netdist"), contains("_netcity"),
         matches("_vote$"), pf3_pull...63,
         education, ethnicity, hhi, political_party, region) %>%  # regex, \\d is any digit, $ to filter out _voters
  # * Rename and recode df  -------------------------------------------------

rename(city_1 = city_first, # new varname = old varname
       city_2 = city_second,
       city_3 = city_third,
       
       dist_1 = you_first,
       dist_2 = you_second,
       dist_3 = you_third,
       
       pf3_vote = pf3_pull...63) %>% 
  
  # first pivot longer to edit column names, then pivot wider later
  pivot_longer(names_to = "name", values_to = "values", cols = city_1:pf3_vote) %>% 
  filter(!is.na(values))  %>% 
  mutate(task = str_extract(name, "\\d+") %>% as.numeric(), # get vignette (task) number
         name = str_replace(name, "_\\d|pf\\d_|cf\\d_", ""), # get rid of task in column names
         name = case_when(name == "incumbent" ~ "eval_incumbent", 
                          name == "project" ~ "eval_project",
                          TRUE ~ name)) %>%  # re-code, equivalent to if-else statements
  pivot_wider(names_from = "name", values_from = "values") %>% 
  #deal with the format ($25,000,000) -> -25000000
  mutate(city = str_replace(city, "'-\\$", "-"),
         city = parse_number(city),
         dist = str_replace(dist, "'-\\$", "-"),
         dist = parse_number(dist)) %>% 
  # now we re-code all the values
  mutate(eval_incumbent = case_when(eval_incumbent == "Strongly disapprove" ~ -1, 
                                    eval_incumbent == "Somewhat disapprove" ~ -.5,
                                    eval_incumbent == "Neither approve nor disapprove" ~ 0,
                                    eval_incumbent == "Somewhat approve" ~ .5,
                                    eval_incumbent == "Strongly approve" ~ 1),
         eval_project = case_when(eval_project == "Strongly disapprove" ~ -1,
                                  eval_project == "Somewhat disapprove" ~ -.5,
                                  eval_project == "Neither approve nor disapprove" ~ 0,
                                  eval_project == "Somewhat approve" ~ .5,
                                  eval_project == "Strongly approve" ~ 1),
         vote_inc = case_when(vote=="I would certainly vote for the incumbent" ~ 1,
                              vote=="I would probably vote for the incumbent" ~ 1,
                              vote=="I would probably vote for the challenger" ~ 0,
                              vote=="I would certainly vote for the challenger" ~ 0,
                              vote=="I'd be equally likely to vote for either candidate" ~ 0),
         vote = case_when(vote == "I would certainly vote for the challenger" ~ -1,
                          vote == "I would probably vote for the challenger" ~ -.5,
                          vote == "I'd be equally likely to vote for either candidate" ~ 0,
                          vote == "I would probably vote for the incumbent" ~ .5,
                          vote == "I would certainly vote for the incumbent" ~ 1)) %>% 
  mutate(paydist_cat = case_when(dist == -1250000 ~ 1,
                                 dist == -625000 ~ 2,
                                 dist == -250000 ~ 3,
                                 dist == -125000 ~ 4,
                                 dist == -25000 ~ 5,
                                 dist == 0 ~ 6,
                                 dist == 25000 ~ 7,
                                 dist == 125000 ~ 8,
                                 dist == 250000 ~ 9,
                                 dist == 625000 ~ 10,
                                 dist == 1250000 ~ 11),
         paycity_cat = case_when(city == -12500000 ~ 1,
                                 city == -10000000 ~ 2,
                                 city == -7500000 ~ 3,
                                 city == -6250000 ~ 4,
                                 city == -5000000 ~ 5,
                                 city == -3750000 ~ 6,
                                 city == -2500000 ~ 7,
                                 city == -1250000 ~ 8,
                                 city == -250000 ~ 9,
                                 city == -25000 ~ 10,
                                 city == 0 ~ 11,
                                 city == 25000 ~ 12,
                                 city == 250000 ~ 13,
                                 city == 1250000 ~ 14,
                                 city == 2500000 ~ 15,
                                 city == 3750000 ~ 16,
                                 city == 5000000 ~ 17,
                                 city == 6250000 ~ 18,
                                 city == 7500000 ~ 19,
                                 city == 10000000 ~ 20,
                                 city == 12500000 ~ 21)) %>% 
  mutate(cityabovezero = case_when(city > 0 ~ 1,
                                   TRUE ~ 0),
         cityatorabovezero = case_when(city >= 0 ~1,
                                       TRUE ~ 0),
         distabovezero = case_when(dist > 0 ~ 1,
                                   TRUE ~ 0),
         distatorabovezero = case_when(dist >= 0 ~ 1,
                                       TRUE ~ 0),
         distworsecity = case_when(dist/25000 < city/250000 ~ 1,
                                   TRUE ~ 0),
         city_pc = city/250000,
         dist_pc = dist/25000,
         vote100 = vote_inc*100,
         #make it show vignette 2 and 3 as categorical variables
         task = factor(task),
         paydist_cat = factor(paydist_cat),
         paycity_cat = factor(paycity_cat)) %>% 
  mutate(education = case_when(education == -3105 ~ NA_real_,
                               TRUE ~ education),
         non_white = case_when(ethnicity != 1 ~ 1,
                               ethnicity == 1 ~ 0),
         hhi = case_when(hhi == -3105 ~ NA_real_,
                         TRUE ~ hhi),
         pid7 = case_when(political_party==6 ~ 3,
                          political_party==7 ~ 4,
                          political_party==8 ~ 5,
                          political_party==9 ~ 6,
                          political_party==10 ~ 7,
                          TRUE ~ political_party),
         region = factor(region)) %>% 
  mutate(dist_correct = case_when(is.na(netdist) ~ NA_integer_,
                                  dist_pc > 0 & netdist == "Net positive return" | 
                                    dist_pc == 0 & netdist == "Net neutral return" |
                                    dist_pc < 0 & netdist == "Net negative return" ~ 1,
                                  TRUE ~ 0),
         city_correct = case_when(is.na(netcity) ~ NA_integer_,
                                  city_pc > 0 & netcity == "Net positive return" | 
                                    city_pc == 0 & netcity == "Net neutral return" |
                                    city_pc < 0 & netcity == "Net negative return" ~ 1,
                                  TRUE ~ 0)) %>% 
# standardize data
  filter(!is.na(eval_incumbent) & 
           !is.na(eval_project) &
           !is.na(vote))


# Experiment 1 Replication 2 Data -------------------------------------------

  # * Read in Data ----------------------------------------------------------

df1R2 <- read_csv("../raw_data/Justifying+Cooperation+New+March+2023+Experiment_May+24%2C+2023_03.23.csv") %>%
  slice(3:n()) %>% 
  filter(consent == "I agree to participate" & method == "Because he left his ID") %>% 
  select(ResponseId, RESPONDENT_ID, consent, method, money, aggregate_order, matches("city_(first|second|third|fourth|fifth)"), 
         matches("you_(first|second|third|fourth|fifth)"), # matches = regex, contains = exact match
         contains("_incumbent"), contains("_project"), contains("_netdist"), contains("_netcity"),
         matches("_vote$")) %>% 
  
  # * Rename and recode df  -------------------------------------------------

rename(city_1 = city_first, # new varname = old varname
       city_2 = city_second,
       city_3 = city_third,
       city_4 = city_fourth,
       city_5 = city_fifth,
       
       
       dist_1 = you_first,
       dist_2 = you_second,
       dist_3 = you_third,
       dist_4 = you_fourth,
       dist_5 = you_fifth) %>% 
  
  # first pivot longer to edit column names, then pivot wider later
  pivot_longer(names_to = "name", values_to = "values", cols = city_1:pf5_vote) %>% 
  filter(!is.na(values)) %>% 
  mutate(task = str_extract(name, "\\d+") %>% as.numeric(), # get vignette (task) number
         name = str_replace(name, "_\\d|pf\\d_|cf\\d_", ""), # get rid of task in column names
         name = case_when(name == "incumbent" ~ "eval_incumbent", 
                          name == "project" ~ "eval_project",
                          TRUE ~ name)) %>%  # re-code, equivalent to if-else statements
  pivot_wider(names_from = "name", values_from = "values") %>% 
  #deal with the format ($25,000,000) -> -25000000
  mutate(city = parse_number(city),
         dist = parse_number(dist)) %>% 
  # now we re-code all the values
  mutate(eval_incumbent = case_when(eval_incumbent == "Strongly disapprove" ~ -1, 
                                    eval_incumbent == "Somewhat disapprove" ~ -.5,
                                    eval_incumbent == "Neither approve nor disapprove" ~ 0,
                                    eval_incumbent == "Somewhat approve" ~ .5,
                                    eval_incumbent == "Strongly approve" ~ 1),
         eval_project = case_when(eval_project == "Strongly disapprove" ~ -1,
                                  eval_project == "Somewhat disapprove" ~ -.5,
                                  eval_project == "Neither approve nor disapprove" ~ 0,
                                  eval_project == "Somewhat approve" ~ .5,
                                  eval_project == "Strongly approve" ~ 1),
         vote_inc = case_when(vote=="I would certainly vote for the incumbent" ~ 1,
                              vote=="I would probably vote for the incumbent" ~ 1,
                              vote=="I would probably vote for the challenger" ~ 0,
                              vote=="I would certainly vote for the challenger" ~ 0,
                              vote=="I'd be equally likely to vote for either candidate" ~ 0),
         vote = case_when(vote == "I would certainly vote for the challenger" ~ -1,
                          vote == "I would probably vote for the challenger" ~ -.5,
                          vote == "I'd be equally likely to vote for either candidate" ~ 0,
                          vote == "I would probably vote for the incumbent" ~ .5,
                          vote == "I would certainly vote for the incumbent" ~ 1)) %>% 
  mutate(paydist_cat = case_when(dist == -55 ~ 1,
                                 dist == -50 ~ 2,
                                 dist == -45 ~ 3,
                                 dist == -30 ~ 4,
                                 dist == -25 ~ 5,
                                 dist == -20 ~ 6,
                                 dist == -15 ~ 7,
                                 dist == -10 ~ 8,
                                 dist == -5 ~ 9,
                                 dist == -2 ~ 10,
                                 dist == -.5 ~ 11,
                                 dist == -.1 ~ 12,
                                 dist == 0 ~ 13,
                                 dist == .1 ~ 14,
                                 dist == .5 ~ 15,
                                 dist == 2 ~ 16,
                                 dist == 5 ~ 17,
                                 dist == 10 ~ 18,
                                 dist == 15 ~ 19,
                                 dist == 20 ~ 21,
                                 dist == 25 ~ 22,
                                 dist == 30 ~ 23,
                                 dist == 45 ~ 24,
                                 dist == 50 ~ 25,
                                 dist == 55 ~ 26),
         paycity_cat = case_when(city == 5 ~ 1,
                                 city == 10 ~ 2,
                                 city == 15 ~ 3,
                                 city == 25 ~ 4,
                                 city == 50 ~ 5)) %>% 
  # make dummy variables 
  mutate(distabovezero = case_when(dist > 0 ~ 1,
                                   TRUE ~ 0),
         distatorabovezero = case_when(dist >= 0 ~ 1,
                                       TRUE ~ 0),
         distworsecity = case_when(dist/25000 < city/250000 ~ 1,
                                   TRUE ~ 0),
         vote100 = vote_inc*100,
         #make it show vignette 2 and 3 as categorical variables
         task = factor(task),
         paydist_cat = factor(paydist_cat),
         paycity_cat = factor(paycity_cat)) %>% 
  rename(city_pc = city,
         dist_pc = dist) %>% 
  # make manipulation checks for recall of the treatment
  mutate(dist_correct = case_when(is.na(netdist) ~ NA_integer_,
                                  dist_pc > 0 & netdist == "Net positive return" | 
                                    dist_pc == 0 & netdist == "Net neutral return" |
                                    dist_pc < 0 & netdist == "Net negative return" ~ 1,
                                  TRUE ~ 0),
         city_correct = case_when(is.na(netcity) ~ NA_integer_,
                                  netcity == "Net positive return" ~ 1,
                                  TRUE ~ 0),
         aggregate_order = case_when(aggregate_order == "county" ~ "County",
                                     aggregate_order == "state" ~ "State")) %>% 
  left_join(df1R2_demo <- read_csv("../raw_data/Justifying Cooperative Distribution Replication.csv") %>% 
              mutate(non_white = case_when(ethnicity == 99 ~ NA_real_,
                                           ethnicity == 1 ~ 0,
                                           TRUE ~ 1),
                     hhi = case_when(income == 99 ~ NA_real_,
                                     TRUE ~ income),
                     pid7 = case_when(political_party_preference == 99 ~ NA_real_,
                                      political_party_preference == 98 ~ 4,
                                      TRUE ~ political_party_preference),
                     region = factor(region)) %>% 
              select(RESPONDENT_ID = id, education, non_white, hhi, pid7, region)) %>% 

# standardize data
  filter(!is.na(eval_incumbent) & 
           !is.na(eval_project) &
           !is.na(vote))




# Experiment 2 Data ------------------------------------------------------------


  # * Read data -------------------------------------------------------------

# Experiment 2 had two waves -- load each wave in separately with an indicator for which wave it was

df2.1 <- read.dta13("../raw_data/first_field_yougov.dta") %>% 
  rename(fielding = first_field) %>% 
  select(consent, you_1, q5:q27, city_1, return_avg:return_person, critique_assign, fielding,
         educ, race2, faminc_new, pid7, res_region) %>% 
  filter(you_1 =="$0" | you_1=="$1,000" | you_1=="$100,000" | you_1=="$25,000" | you_1=="-$1,000" | you_1=="-$25,000")
df2.2 <- read_csv("../raw_data/yale_REFIELD-final_20200810.csv") %>% 
  mutate(fielding = 2) %>% 
  select(consent, q5:q27, -contains("_"), you_1, city_1, 
         return_avg:return_person, critique_assign, fielding,
         educ, race2, faminc_new, pid7, res_region)

df2.1[df2.1 == "NA"] <- NA
df2.2[df2.2 == "NA"] <- NA

df2 <- rbind(df2.1, df2.2) %>% 
  filter(consent == "I agree to participate") %>% 
  select(-consent)

rm(df2.1, df2.2)


  # * Rename and recode -----------------------------------------------------
df2 <- df2 %>% 
  mutate(councilor_thermometer = coalesce(q5, q14, q18, q27),
         project_thermometer = coalesce(q6, q15, q17, q26),
         vote = coalesce(q8, q11, q21, q24),
         approve = coalesce(q9, q12, q20, q23)) %>% 
  mutate(approve_num = case_when(approve =="Strongly disapprove" ~ 1,
                                 approve=="Somewhat disapprove" ~ 2,
                                 approve=="Neither disapprove nor approve" ~ 3,
                                 approve=="Somewhat approve" ~ 4,
                                 approve=="Strongly approve" ~ 5),
         approve_project_bin = case_when(approve=="Somewhat approve" ~ 1,
                                         approve=="Strongly approve" ~ 1,
                                         approve=="Somewhat disapprove" ~ 0,
                                         approve=="Strongly disapprove" ~ 0,
                                         approve=="Neither disapprove nor approve" ~ 0),
         vote_num = case_when(vote=="I would certainly vote for the incumbent" ~ 5,
                              vote=="I would probably vote for the incumbent" ~ 4,
                              vote=="I'd be equally likely to vote for either candidate" ~ 3,
                              vote=="I would probably vote for the challenger" ~ 2,
                              vote=="I would certainly vote for the challenger" ~ 1),
         vote_inc = case_when(vote=="I would certainly vote for the incumbent" ~ 1,
                              vote=="I would probably vote for the incumbent" ~ 1,
                              vote=="I would probably vote for the challenger" ~ 0,
                              vote=="I would certainly vote for the challenger" ~ 0,
                              vote=="I'd be equally likely to vote for either candidate" ~ 0)) %>%  
  rename(city = city_1,
         dist = you_1) %>% 
  #deal with the format ($25,000,000) -> -25000000
  mutate(city = as.numeric(gsub("[$,]", "", city)),
         dist = as.numeric(gsub("[$,]", "", dist))) %>% 
  #make councilor/project thermometer numeric
  mutate(councilor_thermometer = as.numeric(councilor_thermometer),
         project_thermometer = as.numeric(project_thermometer)) %>% 
  mutate(city_pc = city/250000,
         dist_pc = dist/25000,
         dist_worse_than_city = case_when(dist_pc < city_pc ~ 1,
                                          TRUE ~ 0)) %>%
  mutate(critique_offered = case_when(critique_assign == "N" ~ "Control",
                                      critique_assign == "G" ~ "Generic",
                                      critique_assign == "D" ~ "Bad Deal",
                                      critique_assign == "F" ~ "Fair Share")) %>%
  mutate(distatorabovezero = case_when(dist_pc>=0 ~ 1,
                                       TRUE ~ 0),
         distabovezero = case_when(dist_pc>0 ~ 1,
                                   TRUE ~ 0),
         distworsecity= case_when(dist_pc<city_pc ~ 1,
                                  TRUE ~ 0)) %>% 
  mutate(T_generic = case_when(critique_offered == "Generic" ~ 1,
                               TRUE ~ 0),
         T_baddealdist_notgermane = case_when(critique_offered == "Bad Deal" & 
                                                dist_pc >= 0 ~ 1,
                                              TRUE ~ 0),
         T_baddealdist_germane = case_when(critique_offered == "Bad Deal" & 
                                             dist_pc < 0 ~ 1,
                                           TRUE ~ 0),
         T_notfairshare_notgermane = case_when(critique_offered == "Fair Share" & 
                                                 dist_pc>= city_pc ~ 1,
                                               TRUE ~ 0),
         T_notfairshare_germane = case_when(critique_offered == "Fair Share" & 
                                              dist_pc < city_pc ~ 1,
                                            TRUE ~ 0)) %>% 
  mutate(paydist_cat = case_when(dist == -25000 ~ 1,
                                 dist == -1000 ~ 2,
                                 dist == 0 ~ 3,
                                 dist == 1000 ~ 4,
                                 dist == 25000 ~ 5,
                                 dist == 100000 ~ 6),
         paycity_cat = case_when(city == 10000 ~ 1,
                                 city == 250000 ~ 2,
                                 city == 1000000 ~ 3),
         paydist_cat = factor(paydist_cat),
         paycity_cat = factor(paycity_cat)) %>% 
  mutate(education = case_when(educ == "No HS" ~ 1,
                               educ == "High school graduate" ~ 2,
                               educ == "Some college" ~ 3,
                               educ == "2-year" ~ 4,
                               educ == "4-year" ~ 5,
                               educ == "Post-grad" ~ 6),
         non_white = case_when(race2 == "Non-white" ~ 0,
                               race2 == "White" ~ 1),
         hhi = case_when(faminc_new == "Less than $10,000" ~ 1,
                         faminc_new == "$10,000 - $19,999" ~ 2,
                         faminc_new == "$20,000 - $29,999" ~ 3,
                         faminc_new == "$30,000 - $39,999" ~ 4,
                         faminc_new == "$40,000 - $49,999" ~ 5,
                         faminc_new == "$50,000 - $59,999" ~ 6,
                         faminc_new == "$60,000 - $69,999" ~ 7,
                         faminc_new == "$70,000 - $79,999" ~ 8,
                         faminc_new == "$80,000 - $99,999" ~ 9,
                         faminc_new == "$100,000 - $119,999" ~ 10,
                         faminc_new == "$120,000 - $149,999" ~ 11,
                         faminc_new == "$150,000 - $199,999" ~ 12,
                         faminc_new == "$200,000 - $249,999" ~ 13,
                         faminc_new == "$250,000 - $349,999" ~ 14,
                         faminc_new == "$350,000 - $499,999" ~ 15,
                         faminc_new == "$500,000 or more" ~ 16,
                         faminc_new == "Prefer not to say" ~ NA_real_),
         pid7 = case_when(pid7 == "Strong Republican" ~ 1,
                          pid7 == "Not very strong Republican" ~ 2,
                          pid7 == "Lean Republican" ~ 3,
                          pid7 == "Independent" | 
                            pid7 == "Not sure" ~ 4,
                          pid7 == "Lean Democrat" ~ 5,
                          pid7 == "Not very strong Democrat" ~ 6,
                          pid7 == "Strong Democrat" ~ 7),
         region = case_when(res_region == "Northeast" ~ 1,
                            res_region == "Midwest" ~ 2,
                            res_region == "South" ~ 3,
                            res_region == "West" ~ 4)) %>% 
  
  mutate(dist_correct = case_when(is.na(return_district) ~ NA_integer_,
                                  dist_pc > 0 & return_district == "Net positive return" | 
                                    dist_pc == 0 & return_district == "Net neutral return" |
                                    dist_pc < 0 & return_district == "Net negative return" ~ 1,
                                  TRUE ~ 0),
         city_correct = case_when(is.na(return_city) ~ NA_integer_,
                                  city_pc > 0 & return_city == "Net positive return" | 
                                    city_pc == 0 & return_city == "Net neutral return" |
                                    city_pc < 0 & return_city == "Net negative return" ~ 1,
                                  TRUE ~ 0)) %>% 
  
  select(city, dist, everything(), -(q5:q27),
         -c(educ, race2, faminc_new, res_region), dist_correct, city_correct) %>% 
  
  # * Make attention check variables ------------------------------------------
#first attention check
mutate(city_dist_compare = case_when(str_detect(return_avg, "better") &
                                       dist_pc > city_pc ~ 1,
                                     str_detect(return_avg, "worse") &
                                       dist_pc < city_pc ~ 1,
                                     str_detect(return_avg, "equal") &
                                       dist_pc == city_pc ~ 1,
                                     TRUE ~ 0)) %>% 
  #second attention check
  mutate(city_return = case_when(return_city == "Net positive return" ~ 1,
                                 TRUE ~ 0)) %>% 
  #third attention check
  mutate(district_return = case_when(return_district=="Net negative return" & dist_pc<0 ~ 1,
                                     return_district=="Net positive return" & dist_pc>0 ~ 1,
                                     return_district=="Net neutral return" & dist_pc==0 ~ 1,
                                     TRUE ~ 0)) %>% 
  #fourth attention check
  mutate(person_return = case_when(return_person == "Net positive return" ~ 1,
                                   TRUE ~ 0)) %>% 
  #combined attention check score
  mutate(all_four_right = city_dist_compare + city_return + 
           district_return + person_return) %>% 
  
  
  # * Standardize dataset ---------------------------------------------------
filter(!is.na(councilor_thermometer) & 
         !is.na(approve_num) &
         !is.na(vote_num) &
         !is.na(project_thermometer))

# Save data ---------------------------------------------------------------

save(df1, file = "cleaned_exp1.RData")
save(df1R1, file = "cleaned_expR1.RData")
save(df1R2, file = "cleaned_expR2.RData")
save(df2, file = "cleaned_exp2.RData")



